-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[ARM] Port AArch64's CSel handling patterns to Thumb2 #152945
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-backend-arm Author: AZero13 (AZero13) ChangesPatch is 332.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152945.diff 14 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 8f56fb0938dd0..cd26e19378170 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -5765,6 +5765,15 @@ let Predicates = [HasV8_1MMainline] in {
def : T2Pat<(ARMcmov (i32 0), (i32 -1), imm:$cc, CPSR),
(t2CSINV ZR, ZR, (inv_cond_XFORM imm:$cc))>;
+ def : T2Pat<(ARMcmov GPRwithZR:$tval, (i32 1), imm:$cc, CPSR),
+ (t2CSINC GPRwithZR:$tval, ZR, imm:$cc)>;
+ def : T2Pat<(ARMcmov (i32 1), GPRwithZR:$fval, imm:$cc, CPSR),
+ (t2CSINC GPRwithZR:$fval, ZR, (inv_cond_XFORM imm:$cc))>;
+ def : T2Pat<(ARMcmov GPRwithZR:$tval, (i32 -1), imm:$cc, CPSR),
+ (t2CSINV GPRwithZR:$tval, ZR, imm:$cc)>;
+ def : T2Pat<(ARMcmov (i32 -1), GPRwithZR:$fval, imm:$cc, CPSR),
+ (t2CSINV GPRwithZR:$fval, ZR, (inv_cond_XFORM imm:$cc))>;
+
multiclass ModifiedV8_1CSEL<Instruction Insn, dag modvalue> {
def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, imm:$cc, CPSR),
(Insn GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc)>;
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 8ab56b228d2a7..87a1221088e09 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -1039,8 +1039,8 @@ define i64 @stest_f64i64(double %x) {
;
; FULL-LABEL: stest_f64i64:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixdfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -1049,20 +1049,20 @@ define i64 @stest_f64i64(double %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: it ge
; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, lt
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi double %x to i128
%0 = icmp slt i128 %conv, 9223372036854775807
@@ -1295,8 +1295,8 @@ define i64 @stest_f32i64(float %x) {
;
; FULL-LABEL: stest_f32i64:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixsfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -1305,20 +1305,20 @@ define i64 @stest_f32i64(float %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: it ge
; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, lt
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi float %x to i128
%0 = icmp slt i128 %conv, 9223372036854775807
@@ -1556,8 +1556,8 @@ define i64 @stest_f16i64(half %x) {
;
; FULL-LABEL: stest_f16i64:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: vmov.f16 r0, s0
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixhfti
@@ -1568,20 +1568,20 @@ define i64 @stest_f16i64(half %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: it ge
; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, lt
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi half %x to i128
%0 = icmp slt i128 %conv, 9223372036854775807
@@ -2708,8 +2708,8 @@ define i64 @stest_f64i64_mm(double %x) {
;
; FULL-LABEL: stest_f64i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixdfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -2718,21 +2718,21 @@ define i64 @stest_f64i64_mm(double %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi double %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3021,8 +3021,8 @@ define i64 @stest_f32i64_mm(float %x) {
;
; FULL-LABEL: stest_f32i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixsfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -3031,21 +3031,21 @@ define i64 @stest_f32i64_mm(float %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi float %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3339,8 +3339,8 @@ define i64 @stest_f16i64_mm(half %x) {
;
; FULL-LABEL: stest_f16i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: vmov.f16 r0, s0
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixhfti
@@ -3351,21 +3351,21 @@ define i64 @stest_f16i64_mm(half %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi half %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
index 5179f97624489..a42a2a8083f6f 100644
--- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
@@ -633,40 +633,74 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI6_2:
; SOFT-NEXT: .long 131071 @ 0x1ffff
;
-; VFP-LABEL: test_signed_i50_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r4, lr}
-; VFP-NEXT: push {r4, lr}
-; VFP-NEXT: mov r4, r0
-; VFP-NEXT: bl __aeabi_f2lz
-; VFP-NEXT: vldr s0, .LCPI6_0
-; VFP-NEXT: vmov s2, r4
-; VFP-NEXT: vldr s4, .LCPI6_1
-; VFP-NEXT: vcmp.f32 s2, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: ittt lt
-; VFP-NEXT: movlt r1, #0
-; VFP-NEXT: movtlt r1, #65534
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: vcmp.f32 s2, s4
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: it gt
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vcmp.f32 s2, s2
-; VFP-NEXT: itt gt
-; VFP-NEXT: movwgt r1, #65535
-; VFP-NEXT: movtgt r1, #1
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt vs
-; VFP-NEXT: movvs r0, #0
-; VFP-NEXT: movvs r1, #0
-; VFP-NEXT: pop {r4, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI6_0:
-; VFP-NEXT: .long 0xd8000000 @ float -5.62949953E+14
-; VFP-NEXT: .LCPI6_1:
-; VFP-NEXT: .long 0x57ffffff @ float 5.6294992E+14
+; VFP2-LABEL: test_signed_i50_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r4, lr}
+; VFP2-NEXT: push {r4, lr}
+; VFP2-NEXT: mov r4, r0
+; VFP2-NEXT: bl __aeabi_f2lz
+; VFP2-NEXT: vldr s0, .LCPI6_0
+; VFP2-NEXT: vmov s2, r4
+; VFP2-NEXT: vldr s4, .LCPI6_1
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: ittt lt
+; VFP2-NEXT: movlt r1, #0
+; VFP2-NEXT: movtlt r1, #65534
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vcmp.f32 s2, s4
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: it gt
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f32 s2, s2
+; VFP2-NEXT: itt gt
+; VFP2-NEXT: movwgt r1, #65535
+; VFP2-NEXT: movtgt r1, #1
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt vs
+; VFP2-NEXT: movvs r0, #0
+; VFP2-NEXT: movvs r1, #0
+; VFP2-NEXT: pop {r4, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI6_0:
+; VFP2-NEXT: .long 0xd8000000 @ float -5.62949953E+14
+; VFP2-NEXT: .LCPI6_1:
+; VFP2-NEXT: .long 0x57ffffff @ float 5.6294992E+14
+;
+; FP16-LABEL: test_signed_i50_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r4, lr}
+; FP16-NEXT: push {r4, lr}
+; FP16-NEXT: mov r4, r0
+; FP16-NEXT: bl __aeabi_f2lz
+; FP16-NEXT: vldr s0, .LCPI6_0
+; FP16-NEXT: vmov s2, r4
+; FP16-NEXT: vldr s4, .LCPI6_1
+; FP16-NEXT: vcmp.f32 s2, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s4
+; FP16-NEXT: ittt lt
+; FP16-NEXT: movlt r1, #0
+; FP16-NEXT: movtlt r1, #65534
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s2
+; FP16-NEXT: itt gt
+; FP16-NEXT: movwgt r1, #65535
+; FP16-NEXT: movtgt r1, #1
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itt vs
+; FP16-NEXT: movvs r0, #0
+; FP16-NEXT: movvs r1, #0
+; FP16-NEXT: pop {r4, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI6_0:
+; FP16-NEXT: .long 0xd8000000 @ float -5.62949953E+14
+; FP16-NEXT: .LCPI6_1:
+; FP16-NEXT: .long 0x57ffffff @ float 5.6294992E+14
%x = call i50 @llvm.fptosi.sat.i50.f32(float %f)
ret i50 %x
}
@@ -735,37 +769,69 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI7_1:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
-; VFP-LABEL: test_signed_i64_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r4, lr}
-; VFP-NEXT: push {r4, lr}
-; VFP-NEXT: mov r4, r0
-; VFP-NEXT: bl __aeabi_f2lz
-; VFP-NEXT: vldr s0, .LCPI7_0
-; VFP-NEXT: vmov s2, r4
-; VFP-NEXT: vldr s4, .LCPI7_1
-; VFP-NEXT: vcmp.f32 s2, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt lt
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: movlt.w r1, #-2147483648
-; VFP-NEXT: vcmp.f32 s2, s4
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt gt
-; VFP-NEXT: mvngt r1, #-2147483648
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vcmp.f32 s2, s2
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt vs
-; VFP-NEXT: movvs r0, #0
-; VFP-NEXT: movvs r1, #0
-; VFP-NEXT: pop {r4, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI7_0:
-; VFP-NEXT: .long 0xdf000000 @ float -9.22337203E+18
-; VFP-NEXT: .LCPI7_1:
-; VFP-NEXT: .long 0x5effffff @ float 9.22337149E+18
+; VFP2-LABEL: test_signed_i64_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r4, lr}
+; VFP2-NEXT: push {r4, lr}
+; VFP2-NEXT: mov r4, r0
+; VFP2-NEXT: bl __aeabi_f2lz
+; VFP2-NEXT: vldr s0, .LCPI7_0
+; VFP2-NEXT: vmov s2, r4
+; VFP2-NEXT: vldr s4, .LCPI7_1
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt lt
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: movlt.w r1, #-2147483648
+; VFP2-NEXT: vcmp.f32 s2, s4
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt gt
+; VFP2-NEXT: mvngt r1, #-2147483648
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f32 s2, s2
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt vs
+; VFP2-NEXT: movvs r0, #0
+; VFP2-NEXT: movvs r1, #0
+; VFP2-NEXT: pop {r4, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI7_0:
+; VFP2-NEXT: .long 0xdf000000 @ float -9.22337203E+18
+; VFP2-NEXT: .LCPI7_1:
+; VFP2-NEXT: .long 0x5effffff @ float 9.22337149E+18
+;
+; FP16-LABEL: test_signed_i64_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r4, lr}
+; FP16-NEXT: push {r4, lr}
+; FP16-NEXT: mov r4, r0
+; FP16-NEXT: bl __aeabi_f2lz
+; FP16-NEXT: vldr s0, .LCPI7_0
+; FP16-NEXT: vmov s2, r4
+; FP16-NEXT: vldr s4, .LCPI7_1
+; FP16-NEXT: vcmp.f32 s2, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s4
+; FP16-NEXT: itt lt
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: movlt.w r1, #-2147483648
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it gt
+; FP16-NEXT: mvngt r1, #-2147483648
+; FP16-NEXT: vcmp.f32 s2, s2
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itt vs
+; FP16-NEXT: movvs r0, #0
+; FP16-NEXT: movvs r1, #0
+; FP16-NEXT: pop {r4, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI7_0:
+; FP16-NEXT: .long 0xdf000000 @ float -9.22337203E+18
+; FP16-NEXT: .LCPI7_1:
+; FP16-NEXT: .long 0x5effffff @ float 9.22337149E+18
%x = call i64 @llvm.fptosi.sat.i64.f32(float %f)
ret i64 %x
}
@@ -880,43 +946,81 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI8_0:
; SOFT-NEXT: .long 1895825407 @ 0x70ffffff
;
-; VFP-LABEL: test_signed_i100_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r4, lr}
-; VFP-NEXT: push {r4, lr}
-; VFP-NEXT: mov r4, r0
-; VFP-NEXT: bl __fixsfti
-; VFP-NEXT: vldr s0, .LCPI8_0
-; VFP-NEXT: vmov s2, r4
-; VFP-NEXT: vldr s4, .LCPI8_1
-; VFP-NEXT: vcmp.f32 s2, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt lt
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: movlt r1, #0
-; VFP-NEXT: movlt r2, #0
-; VFP-NEXT: mvnlt r3, #7
-; VFP-NEXT: vcmp.f32 s2, s4
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt gt
-; VFP-NEXT: movgt r3, #7
-; VFP-NEXT: movgt.w r2, #-1
-; VFP-NEXT: movgt.w r1, #-1
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vcmp.f32 s2, s2
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt vs
-; VFP-NEXT: movvs r0, #0
-; VFP-NEXT: movvs r1, #0
-; VFP-NEXT: movvs r2, #0
-; VFP-NEXT: movvs r3, #0
-; VFP-NEXT: pop {r4, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI8_0:
-; VFP-NEXT: .long 0xf1000000 @ float -6.338253E+29
-; VFP-NEXT: .LCPI8_1:
-; VFP-NEXT: .long 0x70ffffff @ float 6.33825262E+29
+; VFP2-LABEL: test_signed_i100_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r4, lr}
+; VFP2-NEXT: push {r4, lr}
+; VFP2-NEXT: mov r4, r0
+; VFP2-NEXT: bl __fixsfti
+; VFP2-NEXT: vldr s0, .LCPI8_0
+; VFP2-NEXT: vmov s2, r4
+; VFP2-NEXT: vldr s4, .LCPI8_1
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt lt
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: movlt r1, #0
+; VFP2-NEXT: movlt r2, #0
+; VFP2-NEXT: mvnlt r3, #7
+; VFP2-NEXT: vcmp.f32 s2, s4
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt gt
+; VFP2-NEXT: movgt r3, #7
+; VFP2-NEXT: movgt.w r2, #-1
+; VFP2-NEXT: movgt.w r1, #-1
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f32 s2, s2
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt vs
+; VFP2-NEXT: movvs r0, #0
+; VFP2-NEXT: movvs r1, #0
+; VFP2-NEXT: movvs r2, #0
+; VFP2-NEXT: movvs r3, #0
+; VFP2-NEXT: pop {r4, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI8_0:
+; VFP2-NEXT: .long 0xf1000000 @ float -6.338253E+29
+; VFP2-NEXT: .LCPI8_1:
+; VFP2-NEXT: .long 0x70ffffff @ float 6.33825262E+29
+;
+; FP16-LABEL: test_signed_i100_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r4, lr}
+; FP16-NEXT: push {r4, lr}
+; FP16-NEXT: mov r4, r0
+; FP16-NEXT: bl __fixsfti
+; FP16-NEXT: vldr s0, .LCPI8_0
+; FP16-NEXT: vmov s2, r4
+; FP16-NEXT: vldr s4, .LCPI8_1
+; FP16-NEXT: vcmp.f32 s2, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s4
+; FP16-NEXT: itttt lt
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: movlt r1, #0
+; FP16-NEXT: movlt r2, #0
+; FP16-NEXT: mvnlt r3, #7
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it gt
+; FP16-NEXT: movgt r3, #7
+; FP16-NEXT: vcmp.f32 s2, s2
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itttt vs
+; FP16-NEXT: movvs r0, #0
+; FP...
[truncated]
|
|
Isn't this to just "Port AArch64's CSel handling patterns to Thumb2" and not ARM in general? |
Yeah |
|
ARMcmov works the other way around, this is inverting the condition for the wrong pattern. Make sure you test the patches thoroughly, surprising things can go wrong. (Otherwise this sounds like a useful addition - providing the longer length of a csel doesn't outweigh the benefit of usually not requiring the IT block). |
No description provided.